#--------------------------------------------------#
#   Data Validation AFI (McMann et al. Approach)   #
#--------------------------------------------------#

# Title: "Democratic Resilience in the Twenty-First Century. Search for an Analytical Framework and Explorative Analysis" #
# Authors: "Croissant, Aurel and Lott, Lars", Heidelberg University and FAU Erlangen-Nürnberg
# date: 2025-05-07
# journal: Political Studies
# DOI: 10.1177/00323217251345779
# written under "R version 4.5.0 (2025-04-11 ucrt)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# load packages
library(here)
library(tidyverse)
library(magrittr)
library(reshape2)
library(ggpubr)
library(countrycode)
library(ggrepel)
library(ggthemes)
library(lubridate)
library(dotwhisker)

library(coda)
library(MASS)
library(rjags)
library(parallel)
library(magrittr)
library(dplyr)
library(tools)
library(devtools)

## Install local v-dem tools package
#devtools::install_local("vutils")
library(vutils)

################################################################################
################################################################################

# Taking draws
func_draws <- function(df, VAR, VAR_SD) {
  df_draws <- list()
  iter <- cbind(rep(1, nrow(df)))
  for(i in 1:1800){
    df_draws[[i]] <- df
    df_draws[[i]]$draw <- rnorm(iter, 
                                mean = df[[VAR]], 
                                sd = df[[VAR_SD]])
  }
  df_draws
}


#### Prepare Data for each single latent variable if possible ####

ds.basic.subset.2000 <- readRDS("../data/ds.basic.subset.2000.rds")

# Democracy Stock Variable #
democracy_stock_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, democracy_stock, democracy_stock_sd) 

democracy_stock_data_input_draws <- func_draws(democracy_stock_data_input, "democracy_stock", "democracy_stock_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
democracy_stock_data_input_draws <- lapply(democracy_stock_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
democracy_stock_data_input_draws <- bind_rows(democracy_stock_data_input_draws)

# Step 3: Reshape the data into a wide format
democracy_stock_data_input_draws <- democracy_stock_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(democracy_stock_data_input_draws, file.path("../data/bfa/input/democracy_stock.10000.Z.sample.csv"), row.names = FALSE)

# Legislative Executive Constraints on the executive Variable #
legislative_executive_constraints_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2xlg_legcon, v2xlg_legcon_sd) 

legislative_executive_constraints_data_input_draws <- func_draws(legislative_executive_constraints_data_input, "v2xlg_legcon", "v2xlg_legcon_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
legislative_executive_constraints_data_input_draws <- lapply(legislative_executive_constraints_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
legislative_executive_constraints_data_input_draws <- bind_rows(legislative_executive_constraints_data_input_draws)

# Step 3: Reshape the data into a wide format
legislative_executive_constraints_data_input_draws <- legislative_executive_constraints_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(legislative_executive_constraints_data_input_draws, file.path("../data/bfa/input/legislative_executive_constraints.10000.Z.sample.csv"), row.names = FALSE)


# Horizontal Accountability Index, separate indicators #
# v2juhcind #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2juhcind, v2juhcind_sd) 

horizontal_accountability_data_input_draws <- func_draws(horizontal_accountability_data_input, "v2juhcind", "v2juhcind_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
horizontal_accountability_data_input_draws <- lapply(horizontal_accountability_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
horizontal_accountability_data_input_draws <- bind_rows(horizontal_accountability_data_input_draws)

# Step 3: Reshape the data into a wide format
horizontal_accountability_data_input_draws <- horizontal_accountability_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(horizontal_accountability_data_input_draws, file.path("../data/bfa/input/v2juhcind.10000.Z.sample.csv"), row.names = FALSE)


# Horizontal Accountability Index, separate indicators #
# v2juncind #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2juncind, v2juncind_sd) 

horizontal_accountability_data_input_draws <- func_draws(horizontal_accountability_data_input, "v2juncind", "v2juncind_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
horizontal_accountability_data_input_draws <- lapply(horizontal_accountability_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
horizontal_accountability_data_input_draws <- bind_rows(horizontal_accountability_data_input_draws)

# Step 3: Reshape the data into a wide format
horizontal_accountability_data_input_draws <- horizontal_accountability_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(horizontal_accountability_data_input_draws, file.path("../data/bfa/input/v2juncind.10000.Z.sample.csv"), row.names = FALSE)


# Horizontal Accountability Index, separate indicators #
# v2juhccomp #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2juhccomp, v2juhccomp_sd) 

horizontal_accountability_data_input_draws <- func_draws(horizontal_accountability_data_input, "v2juhccomp", "v2juhccomp_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
horizontal_accountability_data_input_draws <- lapply(horizontal_accountability_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
horizontal_accountability_data_input_draws <- bind_rows(horizontal_accountability_data_input_draws)

# Step 3: Reshape the data into a wide format
horizontal_accountability_data_input_draws <- horizontal_accountability_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(horizontal_accountability_data_input_draws, file.path("data/bfa/input/v2juhccomp.10000.Z.sample.csv"), row.names = FALSE)

# Horizontal Accountability Index, separate indicators #
# v2jucomp #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2jucomp, v2jucomp_sd) 

horizontal_accountability_data_input_draws <- func_draws(horizontal_accountability_data_input, "v2jucomp", "v2jucomp_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
horizontal_accountability_data_input_draws <- lapply(horizontal_accountability_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
horizontal_accountability_data_input_draws <- bind_rows(horizontal_accountability_data_input_draws)

# Step 3: Reshape the data into a wide format
horizontal_accountability_data_input_draws <- horizontal_accountability_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(horizontal_accountability_data_input_draws, file.path("../data/bfa/input/v2jucomp.10000.Z.sample.csv"), row.names = FALSE)

# Horizontal Accountability Index, separate indicators #
# v2exrescon #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2exrescon, v2exrescon_sd) 

horizontal_accountability_data_input_draws <- func_draws(horizontal_accountability_data_input, "v2exrescon", "v2exrescon_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
horizontal_accountability_data_input_draws <- lapply(horizontal_accountability_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
horizontal_accountability_data_input_draws <- bind_rows(horizontal_accountability_data_input_draws)

# Step 3: Reshape the data into a wide format
horizontal_accountability_data_input_draws <- horizontal_accountability_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(horizontal_accountability_data_input_draws, file.path("../data/bfa/input/v2exrescon.10000.Z.sample.csv"), row.names = FALSE)

# Horizontal Accountability Index, separate indicators #
# v2lgotovst #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2lgotovst, v2lgotovst_sd) 

horizontal_accountability_data_input_draws <- func_draws(horizontal_accountability_data_input, "v2lgotovst", "v2lgotovst_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
horizontal_accountability_data_input_draws <- lapply(horizontal_accountability_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
horizontal_accountability_data_input_draws <- bind_rows(horizontal_accountability_data_input_draws)

# Step 3: Reshape the data into a wide format
horizontal_accountability_data_input_draws <- horizontal_accountability_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(horizontal_accountability_data_input_draws, file.path("../data/bfa/input/v2lgotovst.10000.Z.sample.csv"), row.names = FALSE)

# Horizontal Accountability Index, separate indicators #
# v2lginvstp #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2lginvstp, v2lginvstp_sd) 

horizontal_accountability_data_input_draws <- func_draws(horizontal_accountability_data_input, "v2lginvstp", "v2lginvstp_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
horizontal_accountability_data_input_draws <- lapply(horizontal_accountability_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
horizontal_accountability_data_input_draws <- bind_rows(horizontal_accountability_data_input_draws)

# Step 3: Reshape the data into a wide format
horizontal_accountability_data_input_draws <- horizontal_accountability_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(horizontal_accountability_data_input_draws, file.path("../data/bfa/input/v2lginvstp.10000.Z.sample.csv"), row.names = FALSE)

# Horizontal Accountability Index, separate indicators #
# v2lgqstexp #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2lgqstexp, v2lgqstexp_sd) 

horizontal_accountability_data_input_draws <- func_draws(horizontal_accountability_data_input, "v2lgqstexp", "v2lgqstexp_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
horizontal_accountability_data_input_draws <- lapply(horizontal_accountability_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
horizontal_accountability_data_input_draws <- bind_rows(horizontal_accountability_data_input_draws)

# Step 3: Reshape the data into a wide format
horizontal_accountability_data_input_draws <- horizontal_accountability_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(horizontal_accountability_data_input_draws, file.path("../data/bfa/input/v2lgqstexp.10000.Z.sample.csv"), row.names = FALSE)

################################################################################

# Anti-Pluralist Parties Variable #
anti_pluralist_party_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, anti_pluralist_party_index_rev, anti_pluralist_party_index_sd) 

anti_pluralist_party_data_input_draws <- func_draws(anti_pluralist_party_data_input, "anti_pluralist_party_index_rev", "anti_pluralist_party_index_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
anti_pluralist_party_data_input_draws <- lapply(anti_pluralist_party_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
anti_pluralist_party_data_input_draws <- bind_rows(anti_pluralist_party_data_input_draws)

# Step 3: Reshape the data into a wide format
anti_pluralist_party_data_input_draws <- anti_pluralist_party_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(anti_pluralist_party_data_input_draws, file.path("../data/bfa/input/anti_pluralist_party.10000.Z.sample.csv"), row.names = FALSE)

################################################################################

# Polarization Variable #
polarization_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2cacamps_osp_rev, v2cacamps_osp_sd) 

polarization_data_input_draws <- func_draws(polarization_data_input, "v2cacamps_osp_rev", "v2cacamps_osp_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
polarization_data_input_draws <- lapply(polarization_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
polarization_data_input_draws <- bind_rows(polarization_data_input_draws)

# Step 3: Reshape the data into a wide format
polarization_data_input_draws <- polarization_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(polarization_data_input_draws, file.path("../data/bfa/input/polarization.10000.Z.sample.csv"), row.names = FALSE)


################################################################################

# Political Violence Variable #
political_violence_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2caviol_osp_rev, v2caviol_osp_sd) 

political_violence_data_input_draws <- func_draws(political_violence_data_input, "v2caviol_osp_rev", "v2caviol_osp_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
political_violence_data_input_draws <- lapply(political_violence_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
political_violence_data_input_draws <- bind_rows(political_violence_data_input_draws)

# Step 3: Reshape the data into a wide format
political_violence_data_input_draws <- political_violence_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(political_violence_data_input_draws, file.path("../data/bfa/input/political_violence.10000.Z.sample.csv"), row.names = FALSE)


################################################################################

# Robustness of civil society Variable #
civil_society_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2xcs_ccsi, v2xcs_ccsi_sd) 

civil_society_data_input_draws <- func_draws(civil_society_data_input, "v2xcs_ccsi", "v2xcs_ccsi_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
civil_society_data_input_draws <- lapply(civil_society_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
civil_society_data_input_draws <- bind_rows(civil_society_data_input_draws)

# Step 3: Reshape the data into a wide format
civil_society_data_input_draws <- civil_society_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(civil_society_data_input_draws, file.path("../data/bfa/input/civil_society.10000.Z.sample.csv"), row.names = FALSE)


################################################################################

# Civil society Var: v2cseeorgs #
civil_society_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2cseeorgs, v2cseeorgs_sd) 

civil_society_data_input_draws <- func_draws(civil_society_data_input, "v2cseeorgs", "v2cseeorgs_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
civil_society_data_input_draws <- lapply(civil_society_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
civil_society_data_input_draws <- bind_rows(civil_society_data_input_draws)

# Step 3: Reshape the data into a wide format
civil_society_data_input_draws <- civil_society_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(civil_society_data_input_draws, file.path("../data/bfa/input/v2cseeorgs.10000.Z.sample.csv"), row.names = FALSE)


################################################################################

# Civil society Var: v2csreprss #
civil_society_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2csreprss, v2csreprss_sd) 

civil_society_data_input_draws <- func_draws(civil_society_data_input, "v2csreprss", "v2csreprss_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
civil_society_data_input_draws <- lapply(civil_society_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
civil_society_data_input_draws <- bind_rows(civil_society_data_input_draws)

# Step 3: Reshape the data into a wide format
civil_society_data_input_draws <- civil_society_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(civil_society_data_input_draws, file.path("../data/bfa/input/v2csreprss.10000.Z.sample.csv"), row.names = FALSE)

################################################################################

# Civil society Var: v2csprtcpt #
civil_society_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2csprtcpt, v2csprtcpt_sd) 

civil_society_data_input_draws <- func_draws(civil_society_data_input, "v2csprtcpt", "v2csprtcpt_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
civil_society_data_input_draws <- lapply(civil_society_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
civil_society_data_input_draws <- bind_rows(civil_society_data_input_draws)

# Step 3: Reshape the data into a wide format
civil_society_data_input_draws <- civil_society_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(civil_society_data_input_draws, file.path("../data/bfa/input/v2csprtcpt.10000.Z.sample.csv"), row.names = FALSE)


################################################################################

# Participation of civil society Variable #
cs_participation_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2xcs_ccsi, v2xcs_ccsi_sd) 

cs_participation_data_input_draws <- func_draws(cs_participation_data_input, "v2xcs_ccsi", "v2xcs_ccsi_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
cs_participation_data_input_draws <- lapply(cs_participation_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
cs_participation_data_input_draws <- bind_rows(cs_participation_data_input_draws)

# Step 3: Reshape the data into a wide format
cs_participation_data_input_draws <- cs_participation_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(cs_participation_data_input_draws, file.path("../data/bfa/input/cs_participation.10000.Z.sample.csv"), row.names = FALSE)

################################################################################

# Distribution of Power resoruces Variable #
power_resources_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2xeg_eqaccess, v2xeg_eqaccess_sd) 

power_resources_data_input_draws <- func_draws(power_resources_data_input, "v2xeg_eqaccess", "v2xeg_eqaccess_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
power_resources_data_input_draws <- lapply(power_resources_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
power_resources_data_input_draws <- bind_rows(power_resources_data_input_draws)

# Step 3: Reshape the data into a wide format
power_resources_data_input_draws <- power_resources_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(power_resources_data_input_draws, file.path("../data/bfa/input/power_resources.10000.Z.sample.csv"), row.names = FALSE)


################################################################################

# Equal distribution of resources index #
equal_distribution_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2xeg_eqdr, v2xeg_eqdr_sd) 

equal_distribution_data_input_draws <- func_draws(equal_distribution_data_input, "v2xeg_eqdr", "v2xeg_eqdr_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
equal_distribution_data_input_draws <- lapply(equal_distribution_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
equal_distribution_data_input_draws <- bind_rows(equal_distribution_data_input_draws)

# Step 3: Reshape the data into a wide format
equal_distribution_data_input_draws <- equal_distribution_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(equal_distribution_data_input_draws, file.path("../data/bfa/input/equal_distribution.10000.Z.sample.csv"), row.names = FALSE)


################################################################################

# Equal distribution of resources: v2pepwrgen#
equal_distribution_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2pepwrgen, v2pepwrgen_sd) 

equal_distribution_data_input_draws <- func_draws(equal_distribution_data_input, "v2pepwrgen", "v2pepwrgen_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
equal_distribution_data_input_draws <- lapply(equal_distribution_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
equal_distribution_data_input_draws <- bind_rows(equal_distribution_data_input_draws)

# Step 3: Reshape the data into a wide format
equal_distribution_data_input_draws <- equal_distribution_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(equal_distribution_data_input_draws, file.path("../data/bfa/input/v2pepwrgen.10000.Z.sample.csv"), row.names = FALSE)

################################################################################

# Equal distribution of resources: v2pepwrsoc#
equal_distribution_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2pepwrsoc, v2pepwrsoc_sd) 

equal_distribution_data_input_draws <- func_draws(equal_distribution_data_input, "v2pepwrsoc", "v2pepwrsoc_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
equal_distribution_data_input_draws <- lapply(equal_distribution_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
equal_distribution_data_input_draws <- bind_rows(equal_distribution_data_input_draws)

# Step 3: Reshape the data into a wide format
equal_distribution_data_input_draws <- equal_distribution_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(equal_distribution_data_input_draws, file.path("../data/bfa/input/v2pepwrsoc.10000.Z.sample.csv"), row.names = FALSE)


################################################################################

# Equal distribution of resources: v2pepwrses#
equal_distribution_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2pepwrses, v2pepwrses_sd) 

equal_distribution_data_input_draws <- func_draws(equal_distribution_data_input, "v2pepwrses", "v2pepwrses_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
equal_distribution_data_input_draws <- lapply(equal_distribution_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
equal_distribution_data_input_draws <- bind_rows(equal_distribution_data_input_draws)

# Step 3: Reshape the data into a wide format
equal_distribution_data_input_draws <- equal_distribution_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(equal_distribution_data_input_draws, file.path("../data/bfa/input/v2pepwrses.10000.Z.sample.csv"), row.names = FALSE)

################################################################################
# Political Trust  Variable #

legal_system_mood_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, leg, leg_sd) 

legal_system_mood_data_input_draws <- func_draws(legal_system_mood_data_input, "leg", "leg_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
legal_system_mood_data_input_draws <- lapply(legal_system_mood_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
legal_system_mood_data_input_draws <- bind_rows(legal_system_mood_data_input_draws)

# Step 3: Reshape the data into a wide format
legal_system_mood_data_input_draws <- legal_system_mood_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(legal_system_mood_data_input_draws, file.path("../data/bfa/input/legal_system_mood.10000.Z.sample.csv"), row.names = FALSE)


parliament_mood_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, parl, parl_sd) 

parliament_mood_data_input_draws <- func_draws(parliament_mood_data_input, "parl", "parl_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
parliament_mood_data_input_draws <- lapply(parliament_mood_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
parliament_mood_data_input_draws <- bind_rows(parliament_mood_data_input_draws)

# Step 3: Reshape the data into a wide format
parliament_mood_data_input_draws <- parliament_mood_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(parliament_mood_data_input_draws, file.path("../data/bfa/input/parliament_mood.10000.Z.sample.csv"), row.names = FALSE)


government_mood_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, gov, gov_sd) 

government_mood_data_input_draws <- func_draws(government_mood_data_input, "gov", "gov_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
government_mood_data_input_draws <- lapply(government_mood_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
government_mood_data_input_draws <- bind_rows(government_mood_data_input_draws)

# Step 3: Reshape the data into a wide format
government_mood_data_input_draws <- government_mood_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(government_mood_data_input_draws, file.path("../data/bfa/input/government_mood.10000.Z.sample.csv"), row.names = FALSE)

police_mood_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, police, police_sd) 

police_mood_data_input_draws <- func_draws(police_mood_data_input, "police", "police_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
police_mood_data_input_draws <- lapply(police_mood_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
police_mood_data_input_draws <- bind_rows(police_mood_data_input_draws)

# Step 3: Reshape the data into a wide format
police_mood_data_input_draws <- police_mood_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(police_mood_data_input_draws, file.path("../data/bfa/input/police_mood.10000.Z.sample.csv"), row.names = FALSE)


################################################################################
# Confidence in Democracy Variable #
confidence_democracy_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, Satis, Satis_sd) 

confidence_democracy_data_input_draws <- func_draws(confidence_democracy_data_input, "Satis", "Satis_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
confidence_democracy_data_input_draws <- lapply(confidence_democracy_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
confidence_democracy_data_input_draws <- bind_rows(confidence_democracy_data_input_draws)

# Step 3: Reshape the data into a wide format
confidence_democracy_data_input_draws <- confidence_democracy_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(confidence_democracy_data_input_draws, file.path("../data/bfa/input/confidence_democracy.10000.Z.sample.csv"), row.names = FALSE)


################################################################################
# Democratic Mood Variable #
democratic_mood_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, SupDem, SupDem_sd) 

democratic_mood_data_input_draws <- func_draws(democratic_mood_data_input, "SupDem", "SupDem_sd")

# Step 1: Create a new column 'name' that merges 'country_name' and 'historical_date'
democratic_mood_data_input_draws <- lapply(democratic_mood_data_input_draws, function(df) {
  df %>%
    mutate(name = paste(country_text_id, historical_date, sep = " "))
})

# Step 2: Combine all the rows into one dataframe
democratic_mood_data_input_draws <- bind_rows(democratic_mood_data_input_draws)

# Step 3: Reshape the data into a wide format
democratic_mood_data_input_draws <- democratic_mood_data_input_draws %>%
  dplyr::select(name, draw) %>%
  group_by(name) %>%
  mutate(variable = paste0("V", row_number())) %>%
  pivot_wider(names_from = variable, values_from = draw)

write.csv(democratic_mood_data_input_draws, file.path("../data/bfa/input/democratic_mood.10000.Z.sample.csv"), row.names = FALSE)


################################################################################
################################################################################

#### Prepare non latent-variables by using the point estimates 1800 times and store these similar to the latent variables ####

# Function to duplicate a column 1800 times into wide format
duplicate_wide <- function(df, var) {
  # Create a sequence of 1 to 1800 for duplication
  sequence <- 1:1800
  
  # Use tidyr to pivot into wide format
  df_wide <- df %>%
    dplyr::select(country_text_id, historical_date, var) %>%
    mutate(id = row_number()) %>% # Add a row identifier for joining
    mutate(name = paste(country_text_id, historical_date, sep = " ")) %>%
    # Join on the fake sequence to duplicate 1800 times
    crossing(dup_id = sequence) %>%
    pivot_wider(names_from = dup_id, values_from = var, names_prefix = "V") %>%
    dplyr::select(-c(id, country_text_id, historical_date)) # Remove the temporary row identifier
  
  df_wide
}

# Executive Constraints #
executive_contraints_input <-  ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, exconst) 

executive_contraints_input <- duplicate_wide(executive_contraints_input, "exconst")

write.csv(executive_contraints_input, file.path("../data/bfa/input/executive_contraints.10000.Z.sample.csv"), row.names = FALSE)

# Rule of law #
rule_of_law_input <-  ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, legal_system_property_rights) 

rule_of_law_input <- duplicate_wide(rule_of_law_input, "legal_system_property_rights")

write.csv(rule_of_law_input, file.path("../data/bfa/input/rule_of_law.10000.Z.sample.csv"), row.names = FALSE)

# Political Trust #

political_trust_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, political_trust) 

political_trust_data_input <- duplicate_wide(political_trust_data_input, "political_trust")

write.csv(political_trust_data_input, file.path("../data/bfa/input/political_trust.10000.Z.sample.csv"), row.names = FALSE)

# Horizontal Accountability Index, separate indicators #
# v2lgbicam #

horizontal_accountability_data_input <- ds.basic.subset.2000 %>%
  dplyr::select(country_text_id, historical_date, v2lgbicam) 

horizontal_accountability_data_input <- duplicate_wide(horizontal_accountability_data_input, "v2lgbicam")

write.csv(horizontal_accountability_data_input, file.path("../data/bfa/input/v2lgbicam.10000.Z.sample.csv"), row.names = FALSE)

##################################################################################################################################
##################################################################################################################################
##################################################################################################################################

#### Bayesian Factor Analysis

UTABLE <- "data/bfa/country_unit.rds"
INDIR  <- "data/bfa/input/"
OUTDIR <- "data/bfa/results"

dir.create(OUTDIR, showWarnings = F, recursive = T)

stopifnot(dir.exists(INDIR))
stopifnot(dir.exists(OUTDIR))
stopifnot(file.exists(UTABLE))

##### Institutional Dimension #####

SAVE.NAME <- "institutional_dimension"
VARS <- c("democracy_stock", "v2juhcind","v2juncind", "v2juhccomp", "v2jucomp",
          "v2exrescon", "v2lgotovst", "v2lginvstp", "v2lgqstexp")

ITER <- 200
BURNIN <- 10000
MCMC <- 10000
THIN <- 200


sprintf("%d runs with %d sampling iterations, %d burnin, and %d thin",
        ITER, MCMC, BURNIN, THIN) %>% info

utable <- readRDS(UTABLE)
utable_names <- with(utable, paste(country_text_id, historical_date))


# To preserve gaps we need to find where there's jumps in our
# utable. Extract those dates to insert as NA in our individual C vars
# so that when we stretch we don't fill across those periods.
ll <- with(utable, split(year, country_text_id)) %>%
  lapply(function(v) {
    v <- sort(v)
    gapstart <- v[c(diff(v) > 1, F)]
    
    if (length(gapstart) > 0)
      as.Date((gapstart + 1) %^% "-12-31")
  }) %>% Filter(Negate(is.null), .)

gap_dates <- lapply(names(ll), function(s) paste(s, ll[[s]])) %>% unlist

# Load each variable
vars.ll <- lapply(setNames(VARS, VARS), function(varname) {
  m <- locate_z.sample(INDIR, varname) %>% read_matrix
  colnames(m) <- rep(varname, ncol(m))
  
  # If we have any vignettes, chop them off!
  out <- m[!is_vignette(rownames(m)), seq(1, by = ncol(m) / ITER, length.out = ITER)]
  
})

###
# Start by conforming the dimensions b/w all matrices.
cvar_names <- Reduce(union, lapply(vars.ll, rownames))
vars.ll <- lapply(vars.ll, vutils::stretch, cvar_names, gaps = F, preserve.na = T)

# We'll need this after running the model to set the rows we want
# missing (50% of input vars were missing) to NA so that we don't
# stretch over those dates
missing.ma <- lapply(vars.ll, function(ma) rowSums(is.na(ma)) == ncol(ma)) %>%
  do.call(rbind, .)

dropped_dates <- colnames(missing.ma)[colMeans(missing.ma) > .5]

if (length(dropped_dates) > 0) {
  sprintf("Found %d country-dates with >50%% missingness",
          length(dropped_dates)) %>% info
}

vars.ll <- lapply(vars.ll, function(m) m[!rownames(m) %in% dropped_dates,, drop = F])
cvar_names <- setdiff(cvar_names, dropped_dates)

stopifnot(do.call(all_identical, lapply(vars.ll, rownames)))

###
# Initial values. n_obs = total output length of `xi`, i.e. total
# number of unique country-dates.
n_pars <- length(VARS)
n_obs <- length(cvar_names)

sprintf("Found %d total obs", n_obs) %>% info

inits.ll <- lapply(1:4, function(i) {
  list(gamma = mvrnorm(n_pars, c(0, 0), diag(.01, 2)),
       tau = rgamma(n_pars, .01, .01),
       xi = rnorm(n_obs, 0, 1))
})

# Load the necessary library

# Determine the number of cores
num_cores <- 10

# Initialize a cluster with available cores
cl <- makeCluster(num_cores)

# Export necessary variables and functions to the cluster
clusterExport(cl, c("vars.ll", "inits.ll", "jags.model", "update", "coda.samples", "info", "%^%", "BURNIN", 
                    "MCMC", "THIN"))

# Export any required libraries within the cluster
clusterEvalQ(cl, {
  library(coda) # Load the coda library if not already done outside
})

start.time <- Sys.time()

# Use clusterApply or parLapply to run in parallel
posteriors <- parLapply(cl, 1:ITER, function(i) {
  # Load any required packages not already on the cluster
  library(coda) # Just in case, ensure coda is loaded
  
  info("Running model " %^% i)
  full.ma <- do.call(cbind, lapply(vars.ll, function(ma) ma[, i]))
  
  # Normalize prior to running the model
  full.ma <- scale(full.ma)
  
  input.data <- list(n = nrow(full.ma), # n_obs
                     p = ncol(full.ma), # n_pars
                     y = full.ma)
  
  # Divide total runs into 4 groups, each gets same initial values
  inits <- inits.ll[[findInterval(i, c(25, 50, 75)) + 1]]
  
  model <- jags.model(file = "bfa.jag", data = input.data,
                      inits = inits, quiet = T)
  
  update(model, BURNIN)
  mcmc <- coda.samples(model, c("xi", "gamma", "omega"), MCMC, THIN)
  
  # Trust me, life is better when the country-dates are saved
  # together with the posterior object.
  b <- grepl("xi", colnames(mcmc[[1]]), perl = T)
  colnames(mcmc[[1]])[b] <- rownames(full.ma) %^% "_" %^% colnames(mcmc[[1]])[b]
  
  as.mcmc(mcmc)
})

# Stop the cluster after computation
stopCluster(cl)

end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken

mc_assert(posteriors)
info("JAGS models finished")


###
# Check for convergence for each parameter by dividing the runs into
# four pseudo "chains" and then run the Gelman & Rubin diagnostic.
cuts <- findInterval(1:100, c(25, 50, 75), left.open = T)
gelman <- split(posteriors, cuts) %>%
  lapply(function(ll) do.call(rbind, ll) %>% as.mcmc) %>%
  mcmc.list %>%
  gelman.diag(autoburnin = F, multivariate = F)

g <- gelman$psrf
for (p in c("xi", "gamma", "omega")) {
  if (!mean(g[grepl(p, rownames(g)), 1] > 1.1) <= .05) {
    # Save nonconverged posterior objects separately
    file.path(OUTDIR, "nonconverged") %>% dir.create(recursive = T, showWarnings = F)
    file.path(OUTDIR, "nonconverged", sprintf("%s_failed.RData", SAVE.NAME)) %>% save.image
    
    stop("Convergence check failed for " %^% p)
  }
}

file.path(OUTDIR, "mcmc_posteriors") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "mcmc_posteriors", sprintf("post_%s.rds", SAVE.NAME)) %>%
  write_file(posteriors, .)

###
# Extract `xi`, our latent factor, and combine together all
# the runs
combined.posterior <- lapply(posteriors, function(o)  {
  m <- as.matrix(o)
  
  out <- m[, grepl("xi", colnames(m), perl = T)]
  colnames(out) <- sub("_xi.*$", "", colnames(out), perl = T)
  
  out
}) %>% do.call(rbind, .)

# Finally, set the rows where we had >50% missingness to NA
combined.posterior <- add_empty_cols(combined.posterior, dropped_dates)

full_names <- union(utable_names, colnames(combined.posterior))
sprintf("Found %d expanded country-dates", length(full_names)) %>% info

###
# Thin our `xi` posteriors once more for the HLIs. We're only going to
# grab 900 draws to match the dimensions of our z.sample files when
# constructing the HLIs.
if (nrow(combined.posterior) < 900)
  stop("Too few draws in posterior object")

# Stretch the thinned posteriors since we need to clean at least
# frefair according to elecreg.
idx <- seq(1, by = nrow(combined.posterior) / 900, length.out = 900)
thin.ma <- t(combined.posterior[idx, ]) %>%
  stretch(full_names) %>%
  pnorm

file.path(OUTDIR, "thin_post") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "thin_post", sprintf("thin_post_%s.rds", SAVE.NAME)) %>%
  write_file(thin.ma, .)

###
# Summarise and generate point estimates at CD & CY-level.
final_cd.df <- dist_summary(combined.posterior, full_names)

b <- vapply(final_cd.df, is.numeric, logical(1))
final_cd.df[, b] <- lapply(final_cd.df[, b], pnorm)

file.path(OUTDIR, "results", "cd") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "results", "cd", sprintf("bayes.%s_cd.rds", SAVE.NAME)) %>%
  write_file(final_cd.df, .)

final_cy.df <- cy.day_mean(final_cd.df, historical_date, country_text_id)

file.path(OUTDIR, "results", "cy") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "results", "cy", sprintf("bayes.%s_cy.rds", SAVE.NAME)) %>%
  write_file(final_cy.df, .)

info("Finished!")

# Extract factor loadings and uniqueness scores from BFAs
#
# gamma[1] from mcmc-output are the factor loadings with our current bfa.jag
# script. gamma[1] is the slope and gamme[2] the intercept per input variable

# the order must be the same as how the BFA's were run
deps.ll <- list(institutional_dimension = c("democracy_stock", "v2juhcind","v2juncind", "v2juhccomp", "v2jucomp",
                                            "v2exrescon", "v2lgotovst", "v2lginvstp", "v2lgqstexp"))

# Directory that contains the mcmc_posteriors folder
INDIR <- "data/bfa/results"
# Output directory
OUTDIR <- "data/bfa/results"

# filepath posterior file for corruption_index
f <- file.path(INDIR, "mcmc_posteriors", "post_institutional_dimension.rds")

VAR <- gsub("post_", "", f) %>%
  basename %>%
  file_path_sans_ext

posteriors <- read_file(f)

combined.posterior <- lapply(posteriors, function(o)  {
  m <- as.matrix(o)
  m <- m[, grepl("gamma|omega", colnames(m))]
  m
}) %>% do.call(rbind, .)

colnames(combined.posterior)[grepl("gamma\\[\\d+\\,1\\]",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_intercept")

colnames(combined.posterior)[grepl("gamma\\[\\d+\\,2\\]",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_slope")

colnames(combined.posterior)[grepl("omega",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_uniqueness")

combined.posterior %<>% as.data.frame(stringsAsFactors = F)

fu <- function(x) {`^`(x = x, y = 2)}

res <- combined.posterior %>%
  summarize_all(list(median)) %>%
  mutate_at(vars(matches("_uniqueness")), fu)

write_file(res,
           file.path(OUTDIR, "factors_" %^% VAR %^% ".csv"),
           row.names = F)

res_table <- res %>%
  dplyr::select(-c(ends_with("intercept"))) %>%
  pivot_longer(cols= ends_with(c("slope", "uniqueness")), names_to = "Variable", values_to = "Values") 

res_table_uniqueness <- res_table %>%
  filter(str_detect(Variable, "uniqueness"))
res_table_loadings <- res_table %>%
  filter(str_detect(Variable, "slope"))

res_table <- cbind(res_table_loadings, res_table_uniqueness) 

stargazer::stargazer(res_table, summary = FALSE)

modelsummary::datasummary_df(res_table, output = "../data/bfa/BFA_Table_institutional_dimension.docx")

##################################################################################
##################################################################################

##### Political Parties Dimension #####

UTABLE <- "data/bfa/country_unit.rds"
INDIR  <- "data/bfa/input/"
OUTDIR <- "data/bfa/results"

SAVE.NAME <- "political_parties_dimension"
VARS <- c("polarization", "political_violence", "anti_pluralist_party")

ITER <- 200
BURNIN <- 10000
MCMC <- 10000
THIN <- 200


sprintf("%d runs with %d sampling iterations, %d burnin, and %d thin",
        ITER, MCMC, BURNIN, THIN) %>% info

utable <- readRDS(UTABLE)
utable_names <- with(utable, paste(country_text_id, historical_date))


# To preserve gaps we need to find where there's jumps in our
# utable. Extract those dates to insert as NA in our individual C vars
# so that when we stretch we don't fill across those periods.
ll <- with(utable, split(year, country_text_id)) %>%
  lapply(function(v) {
    v <- sort(v)
    gapstart <- v[c(diff(v) > 1, F)]
    
    if (length(gapstart) > 0)
      as.Date((gapstart + 1) %^% "-12-31")
  }) %>% Filter(Negate(is.null), .)

gap_dates <- lapply(names(ll), function(s) paste(s, ll[[s]])) %>% unlist

# Load each variable
vars.ll <- lapply(setNames(VARS, VARS), function(varname) {
  m <- locate_z.sample(INDIR, varname) %>% read_matrix
  colnames(m) <- rep(varname, ncol(m))
  
  # If we have any vignettes, chop them off!
  out <- m[!is_vignette(rownames(m)), seq(1, by = ncol(m) / ITER, length.out = ITER)]
  
})

###
# Start by conforming the dimensions b/w all matrices.
cvar_names <- Reduce(union, lapply(vars.ll, rownames))
vars.ll <- lapply(vars.ll, vutils::stretch, cvar_names, gaps = F, preserve.na = T)

# We'll need this after running the model to set the rows we want
# missing (50% of input vars were missing) to NA so that we don't
# stretch over those dates
missing.ma <- lapply(vars.ll, function(ma) rowSums(is.na(ma)) == ncol(ma)) %>%
  do.call(rbind, .)

dropped_dates <- colnames(missing.ma)[colMeans(missing.ma) > .5]

if (length(dropped_dates) > 0) {
  sprintf("Found %d country-dates with >50%% missingness",
          length(dropped_dates)) %>% info
}

vars.ll <- lapply(vars.ll, function(m) m[!rownames(m) %in% dropped_dates,, drop = F])
cvar_names <- setdiff(cvar_names, dropped_dates)

stopifnot(do.call(all_identical, lapply(vars.ll, rownames)))

###
# Initial values. n_obs = total output length of `xi`, i.e. total
# number of unique country-dates.
n_pars <- length(VARS)
n_obs <- length(cvar_names)

sprintf("Found %d total obs", n_obs) %>% info

inits.ll <- lapply(1:4, function(i) {
  list(gamma = mvrnorm(n_pars, c(0, 0), diag(.01, 2)),
       tau = rgamma(n_pars, .01, .01),
       xi = rnorm(n_obs, 0, 1))
})

# Load the necessary library
library(parallel)

# Determine the number of cores
num_cores <- 10

# Initialize a cluster with available cores
cl <- makeCluster(num_cores)

# Export necessary variables and functions to the cluster
clusterExport(cl, c("vars.ll", "inits.ll", "jags.model", "update", "coda.samples", "info", "%^%", "BURNIN", 
                    "MCMC", "THIN"))

# Export any required libraries within the cluster
clusterEvalQ(cl, {
  library(coda) # Load the coda library if not already done outside
})

start.time <- Sys.time()

# Use clusterApply or parLapply to run in parallel
posteriors <- parLapply(cl, 1:ITER, function(i) {
  # Load any required packages not already on the cluster
  library(coda) # Just in case, ensure coda is loaded
  
  info("Running model " %^% i)
  full.ma <- do.call(cbind, lapply(vars.ll, function(ma) ma[, i]))
  
  # Normalize prior to running the model
  full.ma <- scale(full.ma)
  
  input.data <- list(n = nrow(full.ma), # n_obs
                     p = ncol(full.ma), # n_pars
                     y = full.ma)
  
  # Divide total runs into 4 groups, each gets same initial values
  inits <- inits.ll[[findInterval(i, c(25, 50, 75)) + 1]]
  
  model <- jags.model(file = "bfa.jag", data = input.data,
                      inits = inits, quiet = T)
  
  update(model, BURNIN)
  mcmc <- coda.samples(model, c("xi", "gamma", "omega"), MCMC, THIN)
  
  # Trust me, life is better when the country-dates are saved
  # together with the posterior object.
  b <- grepl("xi", colnames(mcmc[[1]]), perl = T)
  colnames(mcmc[[1]])[b] <- rownames(full.ma) %^% "_" %^% colnames(mcmc[[1]])[b]
  
  as.mcmc(mcmc)
})

# Stop the cluster after computation
stopCluster(cl)

end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken

mc_assert(posteriors)
info("JAGS models finished")

###
# Check for convergence for each parameter by dividing the runs into
# four pseudo "chains" and then run the Gelman & Rubin diagnostic.
cuts <- findInterval(1:100, c(25, 50, 75), left.open = T)
gelman <- split(posteriors, cuts) %>%
  lapply(function(ll) do.call(rbind, ll) %>% as.mcmc) %>%
  mcmc.list %>%
  gelman.diag(autoburnin = F, multivariate = F)

g <- gelman$psrf
for (p in c("xi", "gamma", "omega")) {
  if (!mean(g[grepl(p, rownames(g)), 1] > 1.1) <= .05) {
    # Save nonconverged posterior objects separately
    file.path(OUTDIR, "nonconverged") %>% dir.create(recursive = T, showWarnings = F)
    file.path(OUTDIR, "nonconverged", sprintf("%s_failed.RData", SAVE.NAME)) %>% save.image
    
    stop("Convergence check failed for " %^% p)
  }
}

file.path(OUTDIR, "mcmc_posteriors") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "mcmc_posteriors", sprintf("post_%s.rds", SAVE.NAME)) %>%
  write_file(posteriors, .)

###
# Extract `xi`, our latent factor, and combine together all
# the runs
combined.posterior <- lapply(posteriors, function(o)  {
  m <- as.matrix(o)
  
  out <- m[, grepl("xi", colnames(m), perl = T)]
  colnames(out) <- sub("_xi.*$", "", colnames(out), perl = T)
  
  out
}) %>% do.call(rbind, .)

# Finally, set the rows where we had >50% missingness to NA
combined.posterior <- add_empty_cols(combined.posterior, dropped_dates)

full_names <- union(utable_names, colnames(combined.posterior))
sprintf("Found %d expanded country-dates", length(full_names)) %>% info

###
# Thin our `xi` posteriors once more for the HLIs. We're only going to
# grab 900 draws to match the dimensions of our z.sample files when
# constructing the HLIs.
if (nrow(combined.posterior) < 900)
  stop("Too few draws in posterior object")

# Stretch the thinned posteriors since we need to clean at least
# frefair according to elecreg.
idx <- seq(1, by = nrow(combined.posterior) / 900, length.out = 900)
thin.ma <- t(combined.posterior[idx, ]) %>%
  stretch(full_names) %>%
  pnorm

file.path(OUTDIR, "thin_post") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "thin_post", sprintf("thin_post_%s.rds", SAVE.NAME)) %>%
  write_file(thin.ma, .)

###
# Summarise and generate point estimates at CD & CY-level.
final_cd.df <- dist_summary(combined.posterior, full_names)

b <- vapply(final_cd.df, is.numeric, logical(1))
final_cd.df[, b] <- lapply(final_cd.df[, b], pnorm)

file.path(OUTDIR, "results", "cd") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "results", "cd", sprintf("bayes.%s_cd.rds", SAVE.NAME)) %>%
  write_file(final_cd.df, .)

final_cy.df <- cy.day_mean(final_cd.df, historical_date, country_text_id)

file.path(OUTDIR, "results", "cy") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "results", "cy", sprintf("bayes.%s_cy.rds", SAVE.NAME)) %>%
  write_file(final_cy.df, .)

info("Finished!")

# Extract factor loadings and uniqueness scores from BFAs
#
# gamma[1] from mcmc-output are the factor loadings with our current bfa.jag
# script. gamma[1] is the slope and gamme[2] the intercept per input variable

# the order must be the same as how the BFA's were run
deps.ll <- list(political_parties_dimension = c("polarization", "political_violence", "anti_pluralist_party"))

# Directory that contains the mcmc_posteriors folder
INDIR <- "data/bfa/results"
# Output directory
OUTDIR <- "data/bfa/results"

# filepath posterior file for index
f <- file.path(INDIR, "mcmc_posteriors", "post_political_parties_dimension.rds")

VAR <- gsub("post_", "", f) %>%
  basename %>%
  file_path_sans_ext

posteriors <- read_file(f)

combined.posterior <- lapply(posteriors, function(o)  {
  m <- as.matrix(o)
  m <- m[, grepl("gamma|omega", colnames(m))]
  m
}) %>% do.call(rbind, .)

colnames(combined.posterior)[grepl("gamma\\[\\d+\\,1\\]",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_intercept")

colnames(combined.posterior)[grepl("gamma\\[\\d+\\,2\\]",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_slope")

colnames(combined.posterior)[grepl("omega",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_uniqueness")

combined.posterior %<>% as.data.frame(stringsAsFactors = F)

fu <- function(x) {`^`(x = x, y = 2)}

res <- combined.posterior %>%
  summarize_all(list(median)) %>%
  mutate_at(vars(matches("_uniqueness")), fu)

write_file(res,
           file.path(OUTDIR, "factors_" %^% VAR %^% ".csv"),
           row.names = F)

res_table <- res %>%
  dplyr::select(-c(ends_with("intercept"))) %>%
  pivot_longer(cols= ends_with(c("slope", "uniqueness")), names_to = "Variable", values_to = "Values") 

res_table_uniqueness <- res_table %>%
  filter(str_detect(Variable, "uniqueness"))
res_table_loadings <- res_table %>%
  filter(str_detect(Variable, "slope"))

res_table <- cbind(res_table_loadings, res_table_uniqueness) 

stargazer::stargazer(res_table, summary = FALSE)

modelsummary::datasummary_df(res_table, output = "../data/bfa/BFA_Table_political_parties_dimension.docx")


##################################################################################
##################################################################################

##### Civil Society and Civic Culture Dimension #####

UTABLE <- "data/bfa/country_unit.rds"
INDIR  <- "data/bfa/input/"
OUTDIR <- "data/bfa/results"

SAVE.NAME <- "civil_society_dimension"
VARS <- c("v2pepwrgen", "v2pepwrsoc", "v2pepwrses", "v2cseeorgs", "v2csreprss", "v2csprtcpt")

ITER <- 200
BURNIN <- 10000
MCMC <- 10000
THIN <- 200


sprintf("%d runs with %d sampling iterations, %d burnin, and %d thin",
        ITER, MCMC, BURNIN, THIN) %>% info

utable <- readRDS(UTABLE)
utable_names <- with(utable, paste(country_text_id, historical_date))


# To preserve gaps we need to find where there's jumps in our
# utable. Extract those dates to insert as NA in our individual C vars
# so that when we stretch we don't fill across those periods.
ll <- with(utable, split(year, country_text_id)) %>%
  lapply(function(v) {
    v <- sort(v)
    gapstart <- v[c(diff(v) > 1, F)]
    
    if (length(gapstart) > 0)
      as.Date((gapstart + 1) %^% "-12-31")
  }) %>% Filter(Negate(is.null), .)

gap_dates <- lapply(names(ll), function(s) paste(s, ll[[s]])) %>% unlist

# Load each variable
vars.ll <- lapply(setNames(VARS, VARS), function(varname) {
  m <- locate_z.sample(INDIR, varname) %>% read_matrix
  colnames(m) <- rep(varname, ncol(m))
  
  # If we have any vignettes, chop them off!
  out <- m[!is_vignette(rownames(m)), seq(1, by = ncol(m) / ITER, length.out = ITER)]
  
})

###
# Start by conforming the dimensions b/w all matrices.
cvar_names <- Reduce(union, lapply(vars.ll, rownames))
vars.ll <- lapply(vars.ll, vutils::stretch, cvar_names, gaps = F, preserve.na = T)

# We'll need this after running the model to set the rows we want
# missing (50% of input vars were missing) to NA so that we don't
# stretch over those dates
missing.ma <- lapply(vars.ll, function(ma) rowSums(is.na(ma)) == ncol(ma)) %>%
  do.call(rbind, .)

dropped_dates <- colnames(missing.ma)[colMeans(missing.ma) > .5]

if (length(dropped_dates) > 0) {
  sprintf("Found %d country-dates with >50%% missingness",
          length(dropped_dates)) %>% info
}

vars.ll <- lapply(vars.ll, function(m) m[!rownames(m) %in% dropped_dates,, drop = F])
cvar_names <- setdiff(cvar_names, dropped_dates)

stopifnot(do.call(all_identical, lapply(vars.ll, rownames)))

###
# Initial values. n_obs = total output length of `xi`, i.e. total
# number of unique country-dates.
n_pars <- length(VARS)
n_obs <- length(cvar_names)

sprintf("Found %d total obs", n_obs) %>% info

inits.ll <- lapply(1:4, function(i) {
  list(gamma = mvrnorm(n_pars, c(0, 0), diag(.01, 2)),
       tau = rgamma(n_pars, .01, .01),
       xi = rnorm(n_obs, 0, 1))
})

# Load the necessary library
library(parallel)

# Determine the number of cores
num_cores <- 10

# Initialize a cluster with available cores
cl <- makeCluster(num_cores)

# Export necessary variables and functions to the cluster
clusterExport(cl, c("vars.ll", "inits.ll", "jags.model", "update", "coda.samples", "info", "%^%", "BURNIN", 
                    "MCMC", "THIN"))

# Export any required libraries within the cluster
clusterEvalQ(cl, {
  library(coda) # Load the coda library if not already done outside
})

start.time <- Sys.time()

# Use clusterApply or parLapply to run in parallel
posteriors <- parLapply(cl, 1:ITER, function(i) {
  # Load any required packages not already on the cluster
  library(coda) # Just in case, ensure coda is loaded
  
  info("Running model " %^% i)
  full.ma <- do.call(cbind, lapply(vars.ll, function(ma) ma[, i]))
  
  # Normalize prior to running the model
  full.ma <- scale(full.ma)
  
  input.data <- list(n = nrow(full.ma), # n_obs
                     p = ncol(full.ma), # n_pars
                     y = full.ma)
  
  # Divide total runs into 4 groups, each gets same initial values
  inits <- inits.ll[[findInterval(i, c(25, 50, 75)) + 1]]
  
  model <- jags.model(file = "bfa.jag", data = input.data,
                      inits = inits, quiet = T)
  
  update(model, BURNIN)
  mcmc <- coda.samples(model, c("xi", "gamma", "omega"), MCMC, THIN)
  
  # Trust me, life is better when the country-dates are saved
  # together with the posterior object.
  b <- grepl("xi", colnames(mcmc[[1]]), perl = T)
  colnames(mcmc[[1]])[b] <- rownames(full.ma) %^% "_" %^% colnames(mcmc[[1]])[b]
  
  as.mcmc(mcmc)
})

# Stop the cluster after computation
stopCluster(cl)

end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken

mc_assert(posteriors)
info("JAGS models finished")

###
# Check for convergence for each parameter by dividing the runs into
# four pseudo "chains" and then run the Gelman & Rubin diagnostic.
cuts <- findInterval(1:100, c(25, 50, 75), left.open = T)
gelman <- split(posteriors, cuts) %>%
  lapply(function(ll) do.call(rbind, ll) %>% as.mcmc) %>%
  mcmc.list %>%
  gelman.diag(autoburnin = F, multivariate = F)

g <- gelman$psrf
for (p in c("xi", "gamma", "omega")) {
  if (!mean(g[grepl(p, rownames(g)), 1] > 1.1) <= .05) {
    # Save nonconverged posterior objects separately
    file.path(OUTDIR, "nonconverged") %>% dir.create(recursive = T, showWarnings = F)
    file.path(OUTDIR, "nonconverged", sprintf("%s_failed.RData", SAVE.NAME)) %>% save.image
    
    stop("Convergence check failed for " %^% p)
  }
}

file.path(OUTDIR, "mcmc_posteriors") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "mcmc_posteriors", sprintf("post_%s.rds", SAVE.NAME)) %>%
  write_file(posteriors, .)

###
# Extract `xi`, our latent factor, and combine together all
# the runs
combined.posterior <- lapply(posteriors, function(o)  {
  m <- as.matrix(o)
  
  out <- m[, grepl("xi", colnames(m), perl = T)]
  colnames(out) <- sub("_xi.*$", "", colnames(out), perl = T)
  
  out
}) %>% do.call(rbind, .)

# Finally, set the rows where we had >50% missingness to NA
combined.posterior <- add_empty_cols(combined.posterior, dropped_dates)

full_names <- union(utable_names, colnames(combined.posterior))
sprintf("Found %d expanded country-dates", length(full_names)) %>% info

###
# Thin our `xi` posteriors once more for the HLIs. We're only going to
# grab 900 draws to match the dimensions of our z.sample files when
# constructing the HLIs.
if (nrow(combined.posterior) < 900)
  stop("Too few draws in posterior object")

# Stretch the thinned posteriors since we need to clean at least
# frefair according to elecreg.
idx <- seq(1, by = nrow(combined.posterior) / 900, length.out = 900)
thin.ma <- t(combined.posterior[idx, ]) %>%
  stretch(full_names) %>%
  pnorm

file.path(OUTDIR, "thin_post") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "thin_post", sprintf("thin_post_%s.rds", SAVE.NAME)) %>%
  write_file(thin.ma, .)

###
# Summarise and generate point estimates at CD & CY-level.
final_cd.df <- dist_summary(combined.posterior, full_names)

b <- vapply(final_cd.df, is.numeric, logical(1))
final_cd.df[, b] <- lapply(final_cd.df[, b], pnorm)

file.path(OUTDIR, "results", "cd") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "results", "cd", sprintf("bayes.%s_cd.rds", SAVE.NAME)) %>%
  write_file(final_cd.df, .)

final_cy.df <- cy.day_mean(final_cd.df, historical_date, country_text_id)

file.path(OUTDIR, "results", "cy") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "results", "cy", sprintf("bayes.%s_cy.rds", SAVE.NAME)) %>%
  write_file(final_cy.df, .)

info("Finished!")

# Extract factor loadings and uniqueness scores from BFAs
#
# gamma[1] from mcmc-output are the factor loadings with our current bfa.jag
# script. gamma[1] is the slope and gamme[2] the intercept per input variable

# the order must be the same as how the BFA's were run
deps.ll <- list(civil_society_dimension = c("v2pepwrgen", "v2pepwrsoc", "v2pepwrses", "v2cseeorgs", "v2csreprss", "v2csprtcpt"))

# Directory that contains the mcmc_posteriors folder
INDIR <- "data/bfa/results"
# Output directory
OUTDIR <- "data/bfa/results"

# filepath posterior file for corruption_index
f <- file.path(INDIR, "mcmc_posteriors", "post_civil_society_dimension.rds")

VAR <- gsub("post_", "", f) %>%
  basename %>%
  file_path_sans_ext

posteriors <- read_file(f)

combined.posterior <- lapply(posteriors, function(o)  {
  m <- as.matrix(o)
  m <- m[, grepl("gamma|omega", colnames(m))]
  m
}) %>% do.call(rbind, .)

colnames(combined.posterior)[grepl("gamma\\[\\d+\\,1\\]",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_intercept")

colnames(combined.posterior)[grepl("gamma\\[\\d+\\,2\\]",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_slope")

colnames(combined.posterior)[grepl("omega",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_uniqueness")

combined.posterior %<>% as.data.frame(stringsAsFactors = F)

fu <- function(x) {`^`(x = x, y = 2)}

res <- combined.posterior %>%
  summarize_all(list(median)) %>%
  mutate_at(vars(matches("_uniqueness")), fu)

write_file(res,
           file.path(OUTDIR, "factors_" %^% VAR %^% ".csv"),
           row.names = F)

res_table <- res %>%
  dplyr::select(-c(ends_with("intercept"))) %>%
  pivot_longer(cols= ends_with(c("slope", "uniqueness")), names_to = "Variable", values_to = "Values") 

res_table_uniqueness <- res_table %>%
  filter(str_detect(Variable, "uniqueness"))
res_table_loadings <- res_table %>%
  filter(str_detect(Variable, "slope"))

res_table <- cbind(res_table_loadings, res_table_uniqueness) 

stargazer::stargazer(res_table, summary = FALSE)

modelsummary::datasummary_df(res_table, output = "../data/bfa/BFA_Table_civil_society_dimension.docx")

##################################################################################
##################################################################################

##### Political Community Dimension #####

UTABLE <- "data/bfa/country_unit.rds"
INDIR  <- "data/bfa/input/"
OUTDIR <- "data/bfa/results"

SAVE.NAME <- "political_community_dimension"
VARS <- c("legal_system_mood", "police_mood", "parliament_mood", "government_mood","confidence_democracy")

ITER <- 200
BURNIN <- 10000
MCMC <- 10000
THIN <- 200


sprintf("%d runs with %d sampling iterations, %d burnin, and %d thin",
        ITER, MCMC, BURNIN, THIN) %>% info

utable <- readRDS(UTABLE)
utable_names <- with(utable, paste(country_text_id, historical_date))


# To preserve gaps we need to find where there's jumps in our
# utable. Extract those dates to insert as NA in our individual C vars
# so that when we stretch we don't fill across those periods.
ll <- with(utable, split(year, country_text_id)) %>%
  lapply(function(v) {
    v <- sort(v)
    gapstart <- v[c(diff(v) > 1, F)]
    
    if (length(gapstart) > 0)
      as.Date((gapstart + 1) %^% "-12-31")
  }) %>% Filter(Negate(is.null), .)

gap_dates <- lapply(names(ll), function(s) paste(s, ll[[s]])) %>% unlist

# Load each variable
vars.ll <- lapply(setNames(VARS, VARS), function(varname) {
  m <- locate_z.sample(INDIR, varname) %>% read_matrix
  colnames(m) <- rep(varname, ncol(m))
  
  # If we have any vignettes, chop them off!
  out <- m[!is_vignette(rownames(m)), seq(1, by = ncol(m) / ITER, length.out = ITER)]
  
})

###
# Start by conforming the dimensions b/w all matrices.
cvar_names <- Reduce(union, lapply(vars.ll, rownames))
vars.ll <- lapply(vars.ll, vutils::stretch, cvar_names, gaps = F, preserve.na = T)

# We'll need this after running the model to set the rows we want
# missing (50% of input vars were missing) to NA so that we don't
# stretch over those dates
missing.ma <- lapply(vars.ll, function(ma) rowSums(is.na(ma)) == ncol(ma)) %>%
  do.call(rbind, .)

dropped_dates <- colnames(missing.ma)[colMeans(missing.ma) > .5]

if (length(dropped_dates) > 0) {
  sprintf("Found %d country-dates with >50%% missingness",
          length(dropped_dates)) %>% info
}

vars.ll <- lapply(vars.ll, function(m) m[!rownames(m) %in% dropped_dates,, drop = F])
cvar_names <- setdiff(cvar_names, dropped_dates)

stopifnot(do.call(all_identical, lapply(vars.ll, rownames)))

###
# Initial values. n_obs = total output length of `xi`, i.e. total
# number of unique country-dates.
n_pars <- length(VARS)
n_obs <- length(cvar_names)

sprintf("Found %d total obs", n_obs) %>% info

inits.ll <- lapply(1:4, function(i) {
  list(gamma = mvrnorm(n_pars, c(0, 0), diag(.01, 2)),
       tau = rgamma(n_pars, .01, .01),
       xi = rnorm(n_obs, 0, 1))
})

# Load the necessary library
library(parallel)

# Determine the number of cores
num_cores <- 10

# Initialize a cluster with available cores
cl <- makeCluster(num_cores)

# Export necessary variables and functions to the cluster
clusterExport(cl, c("vars.ll", "inits.ll", "jags.model", "update", "coda.samples", "info", "%^%", "BURNIN", 
                    "MCMC", "THIN"))

# Export any required libraries within the cluster
clusterEvalQ(cl, {
  library(coda) # Load the coda library if not already done outside
})

start.time <- Sys.time()

# Use clusterApply or parLapply to run in parallel
posteriors <- parLapply(cl, 1:ITER, function(i) {
  # Load any required packages not already on the cluster
  library(coda) # Just in case, ensure coda is loaded
  
  info("Running model " %^% i)
  full.ma <- do.call(cbind, lapply(vars.ll, function(ma) ma[, i]))
  
  # Normalize prior to running the model
  full.ma <- scale(full.ma)
  
  input.data <- list(n = nrow(full.ma), # n_obs
                     p = ncol(full.ma), # n_pars
                     y = full.ma)
  
  # Divide total runs into 4 groups, each gets same initial values
  inits <- inits.ll[[findInterval(i, c(25, 50, 75)) + 1]]
  
  model <- jags.model(file = "bfa.jag", data = input.data,
                      inits = inits, quiet = T)
  
  update(model, BURNIN)
  mcmc <- coda.samples(model, c("xi", "gamma", "omega"), MCMC, THIN)
  
  # Trust me, life is better when the country-dates are saved
  # together with the posterior object.
  b <- grepl("xi", colnames(mcmc[[1]]), perl = T)
  colnames(mcmc[[1]])[b] <- rownames(full.ma) %^% "_" %^% colnames(mcmc[[1]])[b]
  
  as.mcmc(mcmc)
})

# Stop the cluster after computation
stopCluster(cl)

end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken

mc_assert(posteriors)
info("JAGS models finished")


###
# Check for convergence for each parameter by dividing the runs into
# four pseudo "chains" and then run the Gelman & Rubin diagnostic.
cuts <- findInterval(1:100, c(25, 50, 75), left.open = T)
gelman <- split(posteriors, cuts) %>%
  lapply(function(ll) do.call(rbind, ll) %>% as.mcmc) %>%
  mcmc.list %>%
  gelman.diag(autoburnin = F, multivariate = F)

g <- gelman$psrf
for (p in c("xi", "gamma", "omega")) {
  if (!mean(g[grepl(p, rownames(g)), 1] > 1.1) <= .05) {
    # Save nonconverged posterior objects separately
    file.path(OUTDIR, "nonconverged") %>% dir.create(recursive = T, showWarnings = F)
    file.path(OUTDIR, "nonconverged", sprintf("%s_failed.RData", SAVE.NAME)) %>% save.image
    
    stop("Convergence check failed for " %^% p)
  }
}

file.path(OUTDIR, "mcmc_posteriors") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "mcmc_posteriors", sprintf("post_%s.rds", SAVE.NAME)) %>%
  write_file(posteriors, .)

###
# Extract `xi`, our latent factor, and combine together all
# the runs
combined.posterior <- lapply(posteriors, function(o)  {
  m <- as.matrix(o)
  
  out <- m[, grepl("xi", colnames(m), perl = T)]
  colnames(out) <- sub("_xi.*$", "", colnames(out), perl = T)
  
  out
}) %>% do.call(rbind, .)

# Finally, set the rows where we had >50% missingness to NA
combined.posterior <- add_empty_cols(combined.posterior, dropped_dates)

full_names <- union(utable_names, colnames(combined.posterior))
sprintf("Found %d expanded country-dates", length(full_names)) %>% info

###
# Thin our `xi` posteriors once more for the HLIs. We're only going to
# grab 900 draws to match the dimensions of our z.sample files when
# constructing the HLIs.
if (nrow(combined.posterior) < 900)
  stop("Too few draws in posterior object")

# Stretch the thinned posteriors since we need to clean at least
# frefair according to elecreg.
idx <- seq(1, by = nrow(combined.posterior) / 900, length.out = 900)
thin.ma <- t(combined.posterior[idx, ]) %>%
  stretch(full_names) %>%
  pnorm

file.path(OUTDIR, "thin_post") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "thin_post", sprintf("thin_post_%s.rds", SAVE.NAME)) %>%
  write_file(thin.ma, .)

###
# Summarise and generate point estimates at CD & CY-level.
final_cd.df <- dist_summary(combined.posterior, full_names)

b <- vapply(final_cd.df, is.numeric, logical(1))
final_cd.df[, b] <- lapply(final_cd.df[, b], pnorm)

file.path(OUTDIR, "results", "cd") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "results", "cd", sprintf("bayes.%s_cd.rds", SAVE.NAME)) %>%
  write_file(final_cd.df, .)

final_cy.df <- cy.day_mean(final_cd.df, historical_date, country_text_id)

file.path(OUTDIR, "results", "cy") %>% dir.create(recursive = T, showWarnings = F)
file.path(OUTDIR, "results", "cy", sprintf("bayes.%s_cy.rds", SAVE.NAME)) %>%
  write_file(final_cy.df, .)

info("Finished!")

# Extract factor loadings and uniqueness scores from BFAs
#
# gamma[1] from mcmc-output are the factor loadings with our current bfa.jag
# script. gamma[1] is the slope and gamme[2] the intercept per input variable

# the order must be the same as how the BFA's were run
deps.ll <- list(political_community_dimension = c("legal_system_mood", "police_mood", "parliament_mood", "government_mood","confidence_democracy"))

# Directory that contains the mcmc_posteriors folder
INDIR <- "data/bfa/results"
# Output directory
OUTDIR <- "data/bfa/results"

# filepath posterior file for corruption_index
f <- file.path(INDIR, "mcmc_posteriors", "post_political_community_dimension.rds")

VAR <- gsub("post_", "", f) %>%
  basename %>%
  file_path_sans_ext

posteriors <- read_file(f)

combined.posterior <- lapply(posteriors, function(o)  {
  m <- as.matrix(o)
  m <- m[, grepl("gamma|omega", colnames(m))]
  m
}) %>% do.call(rbind, .)

colnames(combined.posterior)[grepl("gamma\\[\\d+\\,1\\]",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_intercept")

colnames(combined.posterior)[grepl("gamma\\[\\d+\\,2\\]",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_slope")

colnames(combined.posterior)[grepl("omega",
                                   colnames(combined.posterior))] <-
  paste0(deps.ll[[VAR]], "_uniqueness")

combined.posterior %<>% as.data.frame(stringsAsFactors = F)

fu <- function(x) {`^`(x = x, y = 2)}

res <- combined.posterior %>%
  summarize_all(list(median)) %>%
  mutate_at(vars(matches("_uniqueness")), fu)

write_file(res,
           file.path(OUTDIR, "factors_" %^% VAR %^% ".csv"),
           row.names = F)

res_table <- res %>%
  dplyr::select(-c(ends_with("intercept"))) %>%
  pivot_longer(cols= ends_with(c("slope", "uniqueness")), names_to = "Variable", values_to = "Values") 

res_table_uniqueness <- res_table %>%
  filter(str_detect(Variable, "uniqueness"))
res_table_loadings <- res_table %>%
  filter(str_detect(Variable, "slope"))

res_table <- cbind(res_table_loadings, res_table_uniqueness) 

stargazer::stargazer(res_table, summary = FALSE)

modelsummary::datasummary_df(res_table, output = "../data/bfa/BFA_Table_political_community_dimension.docx")
